EDA HINTS DATA

Code
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
library(gtsummary)
library(reshape2)
library(RColorBrewer)
library(reshape2)
library(ggplot2)
library(caret)

The HINTS survey includes 527 questions. To focus on sentiment toward healthcare and cancer, we selected the questions in Table 1. In Table 1, participants were asked various questions about where they access health information, do they trust the provided health information, and if they feel frustrated about the information.

Table 1

Variable Description
SeekCancerInfo Have you ever looked for information about cancer from any source?
CancerFrustrated Based on the results of your most recent search for information about cancer, how much do you agree or disagree: You felt frustrated during your search for the information.
CancerTrustDoctor In general, how much would you trust information about cancer from a doctor?
CancerTrustFamily In general, how much would you trust information about cancer from family or friends?
CancerTrustGov In general, how much would you trust information about cancer from government health agencies?
CancerTrustCharities In general, how much would you trust information about cancer from charitable organizations?
CancerTrustReligiousOrgs In general, how much would you trust information about cancer from religious organizations and leaders?
CancerTrustScientists In general, how much would you trust information about cancer from scientists?
Electronic2_HealthInfo In the past 12 months have you used the Internet to look for health or medical information?
MisleadingHealthInfo How much of the health information that you see on social media do you think is false or misleading?
TrustHCSystem How much do you trust the health care system (for example, hospitals, pharmacies, and other organizations involved in health care)?
Code
file_path <- '../data/csv/hints_cleaned_forML_spearman.csv'
hints_cleaned <- read.csv(file_path)

# Drop the 'SeekCancerInfo' column from hints_cleaned
hints_cleaned <- hints_cleaned %>%
  select(-SeekCancerInfo, -HHID)

# Drop rows with NA values
hints_cleaned <- na.omit(hints_cleaned)

# print(head(hints_cleaned))
# print(paste("Shape of the dataframe:", paste(dim(hints_cleaned), collapse = " x ")))
Code
# Converting to numeric 
library(dplyr)

# Define the mappings
trust_mapping <- c("Not at all" = 4, "A little" = 3, "Some" = 2, "A lot" = 1)
agreement_mapping <- c("Strongly agree" = 1, "Somewhat agree" = 2, "Somewhat disagree" = 3, "Strongly disagree" = 4)
binary_mapping <- c("Yes" = 1, "No" = 2)
misleading_info_mapping <- c("I do not use social media" = 5, "None" = 4, "A little" = 3, "Some" = 2, "A lot" = 1)

# Apply the mappings and transformations
hints_cleaned <- hints_cleaned %>%
  filter(!is.na(MisleadingHealthInfo)) %>%
  mutate(
    CancerFrustrated = recode(CancerFrustrated, !!!agreement_mapping),
    CancerTrustDoctor = recode(CancerTrustDoctor, !!!trust_mapping),
    CancerTrustFamily = recode(CancerTrustFamily, !!!trust_mapping),
    CancerTrustGov = recode(CancerTrustGov, !!!trust_mapping),
    CancerTrustCharities = recode(CancerTrustCharities, !!!trust_mapping),
    CancerTrustReligiousOrgs = recode(CancerTrustReligiousOrgs, !!!trust_mapping),
    CancerTrustScientists = recode(CancerTrustScientists, !!!trust_mapping),
    TrustHCSystem = recode(TrustHCSystem, !!!trust_mapping),
    Electronic2_HealthInfo = recode(Electronic2_HealthInfo, !!!binary_mapping),
    MisleadingHealthInfo = recode(MisleadingHealthInfo, !!!misleading_info_mapping)
  )

# Display the transformed dataset
# cat("Data after applying mappings to numeric values:\n")
# print(head(hints_cleaned, n = 5), digits = 2)

# Display the data types of the columns
# cat("\nColumn types:\n")
# str(hints_cleaned)
Code
# Standardize the data (excluding non-numeric columns)
numeric_columns <- sapply(hints_cleaned, is.numeric)
standardized_data <- hints_cleaned[, numeric_columns] %>%
  scale()  # Standardize the numeric columns

# Convert the standardized data back to a data frame
standardized_data <- as.data.frame(standardized_data)

# Define target variable
target_variable <- 'TrustHCSystem'  # Replace with your actual target column name

# Check if the target variable is in the standardized data
if (!(target_variable %in% colnames(standardized_data))) {
  stop(paste("Target variable '", target_variable, "' not found in the dataset.", sep = ""))
}

# Compute Spearman correlation matrix
correlation_matrix_spearman <- cor(standardized_data, method = "spearman")

# Rename the matrix to correlation_data
correlation_data <- correlation_matrix_spearman

# Display the full Spearman correlation matrix
# print("Spearman Correlation Matrix (excluding SeekCancerInfo and after standardization):")
# print(correlation_data)

# Focus on the correlation of the target variable with other features
correlation_with_target_spearman <- correlation_data[, target_variable] %>%
  sort(decreasing = TRUE)

# print(paste("\nSpearman correlation of features with", target_variable, ":"))
# print(correlation_with_target_spearman)
Code
# Convert the correlation matrix into a long format for ggplot
melted_correlation <- melt(correlation_matrix_spearman)

# Mask the upper triangle and diagonal of the correlation matrix
melted_correlation$value[upper.tri(correlation_matrix_spearman, diag = TRUE)] <- NA

# Create a custom color scale
custom_colors <- colorRampPalette(c("#ffffff", "#3d6469"))(100)

# Plot the heatmap
ggplot(melted_correlation, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradientn(colours = custom_colors, 
                       limits = c(-1, 1), 
                       na.value = "white",
                       name = "Spearman\nCorrelation") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        axis.text.y = element_text(angle = 0, hjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position = "right") +
  labs(title = "Spearman Correlation Matrix (Ordinal Data)") +
  coord_fixed() +
  geom_text(aes(label = ifelse(is.na(value), "", sprintf("%.2f", value))), 
            color = "black", size = 2.5)

Code
load('../data/HINTS6_R_20240524/hints6_public.rda')
hints <- as.data.frame(public)

#print(colnames(hints))

columns <- c("HHID", "updatedate", "SeekCancerInfo", "CancerFrustrated", "CancerTrustDoctor", "CancerTrustFamily", "CancerTrustGov", "CancerTrustCharities", "CancerTrustReligiousOrgs", "CancerTrustScientists", "Electronic2_HealthInfo", "MisleadingHealthInfo", "TrustHCSystem")

hints_select <- hints %>% select(all_of(columns))
#hints_select$updatedate <- hints_select$updatedate / 1000
#hints_select$updatedate <- as_datetime(hints_select$updatedate)

head(hints_select)
      HHID  updatedate SeekCancerInfo                        CancerFrustrated
1 21000006 13870396800             No Inapplicable, coded 2 in SeekCancerInfo
2 21000009 13874630400             No Inapplicable, coded 2 in SeekCancerInfo
3 21000020 13873680000            Yes                       Somewhat disagree
4 21000022 13867891200             No Inapplicable, coded 2 in SeekCancerInfo
5 21000039 13866336000             No Inapplicable, coded 2 in SeekCancerInfo
6 21000043 13866595200             No Inapplicable, coded 2 in SeekCancerInfo
  CancerTrustDoctor              CancerTrustFamily
1             A lot Missing data (Not Ascertained)
2             A lot                           Some
3             A lot                           Some
4             A lot Missing data (Not Ascertained)
5              Some                           Some
6             A lot                           Some
                  CancerTrustGov           CancerTrustCharities
1 Missing data (Not Ascertained) Missing data (Not Ascertained)
2                          A lot                           Some
3                           Some                       A little
4 Missing data (Not Ascertained) Missing data (Not Ascertained)
5                           Some                     Not at all
6                           Some                          A lot
        CancerTrustReligiousOrgs          CancerTrustScientists
1 Missing data (Not Ascertained) Missing data (Not Ascertained)
2                           Some                          A lot
3                     Not at all                          A lot
4 Missing data (Not Ascertained) Missing data (Not Ascertained)
5                     Not at all                           Some
6                       A little                          A lot
                         Electronic2_HealthInfo      MisleadingHealthInfo
1 Question answered in error (Commission Error) I do not use social media
2                                           Yes I do not use social media
3                                           Yes                      Some
4          Inapplicable, coded 2 in UseInternet I do not use social media
5                                           Yes                     A lot
6                                           Yes                     A lot
  TrustHCSystem
1          Very
2          Very
3      Somewhat
4      Somewhat
5      Somewhat
6      A little

Survey Responses

In the bar graphs, a first look at the data provides an general overview of the responses to the questions. These plots show how much participants agree or disagree to each question. For example, many participants can trust doctors a lot and less than family members.

Code
plot_data <- hints_select %>%
  select(-HHID, -updatedate) %>%  # Exclude the first two columns
  pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
  count(Variable, Value)

print(unique(plot_data$Value))
 [1] Missing data (Not Ascertained)                  
 [2] Missing data (Filter Missing)                   
 [3] Multiple responses selected in error            
 [4] Question answered in error (Commission Error)   
 [5] Inapplicable, coded 2 in SeekCancerInfo         
 [6] Strongly agree                                  
 [7] Somewhat agree                                  
 [8] Somewhat disagree                               
 [9] Strongly disagree                               
[10] A lot                                           
[11] Some                                            
[12] A little                                        
[13] Not at all                                      
[14] Yes                                             
[15] No                                              
[16] Inapplicable, coded 2 in UseInternet            
[17] Missing data (Web partial - Question Never Seen)
[18] None                                            
[19] I do not use social media                       
[20] Very                                            
[21] Somewhat                                        
21 Levels: Missing data (Not Ascertained) Yes ... Somewhat
Code
values <- c("Strongly agree", "Somewhat agree", "Somewhat disagree", "Strongly disagree", "A lot", "Some", 
            "A little", "Not at all", "Yes", "No", "None", "I do not use social media", "Very", "Somewhat")

plot_data_filtered <- plot_data %>% filter(Value %in% values) 
plot_data_filtered$Value <- factor(plot_data_filtered$Value, levels = sort(unique(plot_data_filtered$Value)))

columns_1 <- c("SeekCancerInfo", "CancerFrustrated", "CancerTrustDoctor", "CancerTrustFamily")
plot_data_filtered_1 <- plot_data_filtered %>% filter(Variable %in% columns_1)
columns_2 <- c("CancerTrustGov", "CancerTrustCharities", "CancerTrustReligiousOrgs", "CancerTrustScientists")
plot_data_filtered_2 <- plot_data_filtered %>% filter(Variable %in% columns_2)
columns_3 <- c("Electronic2_HealthInfo", "MisleadingHealthInfo", "TrustHCSystem")
plot_data_filtered_3 <- plot_data_filtered %>% filter(Variable %in% columns_3) 

p <- ggplot(plot_data_filtered_1, aes(x = Value, y = n, fill = Variable)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ Variable, scales = "free_x") +  # Separate plots for each column
  theme_minimal() +
  labs(
    title = "HINTS Survey Responses",
    x = "Responses",
    y = "Count",
    fill = "Question"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

p

Code
png("../data/HINTS6_R_20240524/HINTS_plot1.png", width = 800, height = 600)
print(p)  
dev.off()
quartz_off_screen 
                2 
Code
p <- ggplot(plot_data_filtered_2, aes(x = Value, y = n, fill = Variable)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ Variable, scales = "free_x") +  
  theme_minimal() +
  labs(
    title = "HINTS Survey Responses",
    x = "Responses",
    y = "Count",
    fill = "Question"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

p

Code
png("../data/HINTS6_R_20240524/HINTS_plot2.png", width = 800, height = 600)
print(p)  
dev.off()
quartz_off_screen 
                2 
Code
p <- ggplot(plot_data_filtered_3, aes(x = Value, y = n, fill = Variable)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ Variable, scales = "free_x", nrow=2) +
  theme_minimal() +
  labs(
    title = "HINTS Survey Responses",
    x = "Responses",
    y = "Count",
    fill = "Question"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

p

Code
png("../data/HINTS6_R_20240524/HINTS_plot3.png", width = 800, height = 600)
print(p) 
dev.off()
quartz_off_screen 
                2 

Summary Statistics of the Responses

The responses of the questions can be coded into a scale from 0:3. For example, “Not at all” is coded as 0, “A little” is coded as 1, “Some” is coded as 2, and “A lot” is coded as 3. After coding these responses, the mean, median, and mode is calculated to highlight overall trends in the data.

Code
print(unique(hints_select$MisleadingHealthInfo))
[1] I do not use social media                       
[2] Some                                            
[3] A lot                                           
[4] A little                                        
[5] None                                            
[6] Missing data (Not Ascertained)                  
[7] Missing data (Web partial - Question Never Seen)
7 Levels: Missing data (Not Ascertained) ...
Code
print(colnames(hints_select))
 [1] "HHID"                     "updatedate"              
 [3] "SeekCancerInfo"           "CancerFrustrated"        
 [5] "CancerTrustDoctor"        "CancerTrustFamily"       
 [7] "CancerTrustGov"           "CancerTrustCharities"    
 [9] "CancerTrustReligiousOrgs" "CancerTrustScientists"   
[11] "Electronic2_HealthInfo"   "MisleadingHealthInfo"    
[13] "TrustHCSystem"           
Code
hints_select_coded <- hints_select %>%
  mutate(CancerFrustrated = as.numeric(case_when(
    CancerFrustrated == "Strongly disagree" ~ "0",
    CancerFrustrated == "Somewhat disagree" ~ "1",
    CancerFrustrated == "Somewhat agree" ~ "2",
    CancerFrustrated == "Strongly agree" ~ "3",
    TRUE ~  NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(CancerTrustDoctor = as.numeric(case_when(
    CancerTrustDoctor == "Not at all" ~ "0",
    CancerTrustDoctor == "A little" ~ "1",
    CancerTrustDoctor == "Some" ~ "2",
    CancerTrustDoctor == "A lot" ~ "3",
    TRUE ~  NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(CancerTrustFamily = as.numeric(case_when(
    CancerTrustFamily == "None" ~ "0",
    CancerTrustFamily == "A little" ~ "1",
    CancerTrustFamily == "Some" ~ "2",
    CancerTrustFamily == "A lot" ~ "3",
    TRUE ~ NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(SeekCancerInfo = as.numeric(case_when(
    SeekCancerInfo == "Yes" ~ "1",
    SeekCancerInfo == "No" ~ "0",
    TRUE ~ NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(CancerTrustCharities = as.numeric(case_when(
    CancerTrustCharities == "Not at all" ~ "0",
    CancerTrustCharities == "A little" ~ "1",
    CancerTrustCharities == "Some" ~ "2",
    CancerTrustCharities == "A lot" ~ "3",
    TRUE ~ NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(CancerTrustGov = as.numeric(case_when(
    CancerTrustGov == "Not at all" ~ "0",
    CancerTrustGov == "A little" ~ "1",
    CancerTrustGov == "Some" ~ "2",
    CancerTrustGov == "A lot" ~ "3",
    TRUE ~ NA
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(CancerTrustReligiousOrgs = as.numeric(case_when(
    CancerTrustReligiousOrgs == "Not at all" ~ "0",
    CancerTrustReligiousOrgs == "A little" ~ "1",
    CancerTrustReligiousOrgs == "Some" ~ "2",
    CancerTrustReligiousOrgs == "A lot" ~ "3",
    TRUE ~ NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(CancerTrustScientists = as.numeric(case_when(
    CancerTrustScientists == "Not at all" ~ "0",
    CancerTrustScientists == "A little" ~ "1",
    CancerTrustScientists == "Some" ~ "2",
    CancerTrustScientists == "A lot" ~ "3",
    TRUE ~ NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(Electronic2_HealthInfo = as.numeric(case_when(
    Electronic2_HealthInfo == "Yes" ~ "1",
    Electronic2_HealthInfo == "No" ~ "0",
    TRUE ~ NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(MisleadingHealthInfo = as.numeric(case_when(
    MisleadingHealthInfo == "None" ~ "0",
    MisleadingHealthInfo == "I do not use social media" ~ "0",
    MisleadingHealthInfo == "A little" ~ "1",
    MisleadingHealthInfo == "Some" ~ "2",
    MisleadingHealthInfo == "A lot" ~ "3",
    TRUE ~ NA 
  )))

hints_select_coded <- hints_select_coded %>%
  mutate(TrustHCSystem = as.numeric(case_when(
    TrustHCSystem == "Not at all" ~ "0",
    TrustHCSystem == "A little" ~ "1",
    TrustHCSystem == "Somewhat" ~ "2",
    TrustHCSystem == "Very" ~ "3",
    TRUE ~ NA 
  )))


print(head(hints_select_coded))
      HHID  updatedate SeekCancerInfo CancerFrustrated CancerTrustDoctor
1 21000006 13870396800              0               NA                 3
2 21000009 13874630400              0               NA                 3
3 21000020 13873680000              1                1                 3
4 21000022 13867891200              0               NA                 3
5 21000039 13866336000              0               NA                 2
6 21000043 13866595200              0               NA                 3
  CancerTrustFamily CancerTrustGov CancerTrustCharities
1                NA             NA                   NA
2                 2              3                    2
3                 2              2                    1
4                NA             NA                   NA
5                 2              2                    0
6                 2              2                    3
  CancerTrustReligiousOrgs CancerTrustScientists Electronic2_HealthInfo
1                       NA                    NA                     NA
2                        2                     3                      1
3                        0                     3                      1
4                       NA                    NA                     NA
5                        0                     2                      1
6                        1                     3                      1
  MisleadingHealthInfo TrustHCSystem
1                    0             3
2                    0             3
3                    2             2
4                    0             2
5                    3             2
6                    3             1

In the summary table below, the mean for trusting a doctor is higher than trusting the government. Given this information, we will also look at the Reddit dataset to see the level of trust users have when they mention the government in their comments versus doctors. In addition, the people who felt frustrated about the information they received about cancer is approximately 1.105. In the Reddit dataset, we also look for an equivalent using textual data by looking at positive/negative and emotion sentiment analysis on comments that include the word “cancer”.

Code
print(summary(hints_select_coded))
     HHID             updatedate        SeekCancerInfo   CancerFrustrated
 Length:6252        Min.   :1.387e+10   Min.   :0.0000   Min.   :0.000   
 Class :character   1st Qu.:1.387e+10   1st Qu.:0.0000   1st Qu.:0.000   
 Mode  :character   Median :1.387e+10   Median :0.0000   Median :1.000   
                    Mean   :1.387e+10   Mean   :0.4654   Mean   :1.105   
                    3rd Qu.:1.387e+10   3rd Qu.:1.0000   3rd Qu.:2.000   
                    Max.   :1.389e+10   Max.   :1.0000   Max.   :3.000   
                                        NA's   :17       NA's   :3420    
 CancerTrustDoctor CancerTrustFamily CancerTrustGov CancerTrustCharities
 Min.   :0.000     Min.   :1.000     Min.   :0.00   Min.   :0.000       
 1st Qu.:2.000     1st Qu.:1.000     1st Qu.:1.00   1st Qu.:1.000       
 Median :3.000     Median :2.000     Median :2.00   Median :1.000       
 Mean   :2.656     Mean   :1.678     Mean   :1.92   Mean   :1.403       
 3rd Qu.:3.000     3rd Qu.:2.000     3rd Qu.:3.00   3rd Qu.:2.000       
 Max.   :3.000     Max.   :3.000     Max.   :3.00   Max.   :3.000       
 NA's   :94        NA's   :783       NA's   :273    NA's   :308         
 CancerTrustReligiousOrgs CancerTrustScientists Electronic2_HealthInfo
 Min.   :0.0000           Min.   :0.000         Min.   :0.0000        
 1st Qu.:0.0000           1st Qu.:2.000         1st Qu.:1.0000        
 Median :1.0000           Median :3.000         Median :1.0000        
 Mean   :0.9484           Mean   :2.357         Mean   :0.8534        
 3rd Qu.:2.0000           3rd Qu.:3.000         3rd Qu.:1.0000        
 Max.   :3.0000           Max.   :3.000         Max.   :1.0000        
 NA's   :280              NA's   :218           NA's   :1130          
 MisleadingHealthInfo TrustHCSystem
 Min.   :0.000        Min.   :0.0  
 1st Qu.:1.000        1st Qu.:2.0  
 Median :2.000        Median :2.0  
 Mean   :1.716        Mean   :2.2  
 3rd Qu.:3.000        3rd Qu.:3.0  
 Max.   :3.000        Max.   :3.0  
 NA's   :82           NA's   :134  
Code
hints_select_coded_clean <- drop_na(hints_select_coded)

table1 <- hints_select_coded_clean %>%
  select(-HHID, -updatedate) %>%
  tbl_summary(
    statistic = all_continuous() ~ "{mean} ± {sd}",  
  )

table1
Characteristic N = 2,3151
SeekCancerInfo
    1 2,315 (100%)
CancerFrustrated
    0 759 (33%)
    1 780 (34%)
    2 624 (27%)
    3 152 (6.6%)
CancerTrustDoctor
    0 11 (0.5%)
    1 72 (3.1%)
    2 441 (19%)
    3 1,791 (77%)
CancerTrustFamily
    1 970 (42%)
    2 1,225 (53%)
    3 120 (5.2%)
CancerTrustGov
    0 133 (5.7%)
    1 357 (15%)
    2 1,065 (46%)
    3 760 (33%)
CancerTrustCharities
    0 251 (11%)
    1 798 (34%)
    2 1,086 (47%)
    3 180 (7.8%)
CancerTrustReligiousOrgs
    0 907 (39%)
    1 832 (36%)
    2 494 (21%)
    3 82 (3.5%)
CancerTrustScientists
    0 41 (1.8%)
    1 165 (7.1%)
    2 608 (26%)
    3 1,501 (65%)
Electronic2_HealthInfo 2,203 (95%)
MisleadingHealthInfo
    0 287 (12%)
    1 287 (12%)
    2 959 (41%)
    3 782 (34%)
TrustHCSystem
    0 60 (2.6%)
    1 231 (10.0%)
    2 1,109 (48%)
    3 915 (40%)
1 n (%)

The box plot provides a visualization of the median, mode, and outliers in the dataset.

Code
boxplot_data <- hints_select_coded %>%
  select(-HHID, -updatedate)


boxplot_data_long <- boxplot_data %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Value")


ggplot(boxplot_data_long, aes(x = Variable, y = Value)) +
  geom_boxplot(outlier.colour = "red", outlier.size = 1) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  
  labs(
    title = "HINTS Boxplot",
    x = "Variables",
    y = "Values"
  ) + coord_cartesian(ylim = c(-1, 4)) 
Warning: Removed 6739 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Relationship between different survey questions.

Using a correlation plot, the relationships are evaluated between the different survey questions. In the correlation plot below, we see an negative correlation between trust for doctors versus trusting the government, scientists, and the healthcare system. Further statistical testing can be performed to better understand this initial evaluation.

Code
library(corrplot)
corrplot 0.95 loaded
Code
cor_matrix <- drop_na(hints_select_coded)

# correlation matrix
cor_matrix <- cor(cor_matrix[, sapply(cor_matrix, is.numeric)], use = "complete.obs")
Warning in cor(cor_matrix[, sapply(cor_matrix, is.numeric)], use =
"complete.obs"): the standard deviation is zero
Code
# correlation plot
corrplot(cor_matrix, method = "circle", type = "upper", tl.col = "black", tl.srt = 45, 
         addCoef.col = "black", number.cex = 0.7, diag = FALSE)

Further Directions and Conclusion

The HINTS dataset provides insight into the perceptions of healthcare and cancer information. The trends in this dataset will be repeated in the Reddit dataset. Using the Reddit dataset, we will explore sentiments, such as positive/negative, frustrations, and trust. We will also look at word frequency count to review, which topics Reddit users commonly comment about.